\documentclass[10pt,a4paper]{article} 

\usepackage{ctex} % 中文支持
\usepackage[top=2.5cm, bottom=2.5cm, left=2.5cm, right=2.5cm]{geometry} % 页边距
\usepackage{amsmath, amssymb} % 数学公式与符号
\usepackage{graphicx}
\usepackage{pythonhighlight}
\usepackage{url} 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{titling}
\setlength{\droptitle}{-2cm} % 标题上移

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 使表格美观
\usepackage{array}
\newcolumntype{M}[1]{>{\centering\arraybackslash}m{#1}}
\setlength\extrarowheight{3pt}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%文档的题目、作者与日期
\author{五六七 }
\title{不同工艺生产的灯泡寿命与方差分析 }

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\maketitle

\begin{abstract}
使用方差分析的方法来分析不同工艺生产的灯泡的寿命是否有显著差异。

\end{abstract}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\setcounter{tocdepth}{2}
%\renewcommand\contentsname{目录}
%
%\renewcommand {\baselinestretch} {1.3}\normalsize 
%\tableofcontents 
%\renewcommand {\baselinestretch} {1.0}\normalsize


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{问题描述}
用四种工艺A1, A2, A3, A4生产灯泡，从各种工艺制成的灯泡中各抽出了若干个测量其寿命，结果如表所示。
试推断这几种工艺制成的灯泡寿命是否有显著差异。

\begin{table}[ht!]\centering
\caption{四种工艺生产的灯泡寿命} \vspace{0.2cm}
\begin{tabular}{|M{2cm}|M{2cm}|M{2cm}|M{2cm}|}\hline
A1 & A2 & A3 & A4  \\ \hline 
1620 & 1580 & 1460 & 1500  \\ \hline 
1670 & 1600 & 1540 & 1550  \\ \hline 
1700 & 1640 & 1620 & 1610  \\ \hline 
1750 & 1720 & 1680 &   \\ \hline 
1800 &  &  &   \\ \hline 
\end{tabular}
\end{table}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{建立模型}
\subsection{化为假设检验问题的基本思路}
设四种工艺生产的灯泡的寿命分别服从正态分布
\begin{eqnarray}
X_i \sim N(\mu_i, \sigma_i^2), \,\, i=1,2,3,4. 
\end{eqnarray}
为简单起见，我们假设方差相等，即有
\begin{eqnarray}
\sigma_1^2 = \sigma_2^2 = \sigma_3^2 = \sigma_4^2 = \sigma^2. 
\end{eqnarray}
这样，我们把问题转化为一个假设检验问题
\begin{eqnarray}
H_0: \mu_1=\mu_2=\mu_3=\mu_4 \,\,\,\,\,v.s.\,\,\,\,\, H_1: \mu_1, \mu_2, \mu_3, \mu_4 \text{互不相等。}
\end{eqnarray}

\subsection{写成一个更具体的统计模型}
设第 $i$ 种工艺生产的灯泡共有 $n_i$ 个，其寿命分别为 $X_{i1}, X_{i2},\cdots,X_{in_i}$. 则有 
\begin{eqnarray}
X_{ij} = \mu_i + \varepsilon_{ij}, 
\end{eqnarray}
其中我们设误差服从正态分布，即 $\varepsilon_i \sim N(0,\sigma^2)$. 
记 $n= n_1+n_2+n_3+n_4$ 是检查的灯泡总数。记 
\begin{eqnarray}
\mu = \frac{1}{n} (n_1\mu_1+  n_2\mu_2+  n_3\mu_3+  n_4\mu_4)
\end{eqnarray}
是总均值，记 
\begin{eqnarray}
\alpha_i = \mu_i - \mu, \,\,\, i =1,2,3,4,
\end{eqnarray}
是每种工艺的灯泡寿命的均值与总均值的差，也称为指标 $A_i$ 的效应。

在引入了上述符号之后，我们可以把这个统计模型写成如下形式，
\begin{eqnarray}
\left\{\begin{array}{l}
X_{1j} = \mu + \alpha_1 + \varepsilon_{1j}, \,\,\,\, j=1,2,3,4,5, \\
X_{2j} = \mu + \alpha_2 + \varepsilon_{2j}, \,\,\,\, j=1,2,3,4, \\
X_{3j} = \mu + \alpha_3 + \varepsilon_{3j}, \,\,\,\, j=1,2,3,4, \\
X_{4j} = \mu + \alpha_4 + \varepsilon_{4j}, \,\,\,\, j=1,2,3, \\
n_1\alpha_1 + n_2\alpha_2 + n_3\alpha_3 + n_4\alpha_4 = 0, \\  
\varepsilon_{ij} \sim N(0, \sigma^2). \\ 
\end{array}\right. 
\end{eqnarray}
其中 $\mu, \alpha_1, \alpha_2, \alpha_3, \alpha_4, \sigma^2$ 是参数。 
我们要检验的原假设为
\begin{eqnarray}
H_0: \alpha_1=\alpha_2=\alpha_3=\alpha_4 = 0. 
\end{eqnarray}

\subsection{三个平方和 }
接下来我们将要构造一个检验统计量。思路是考虑这些分组数据的不同形式的方差。首先计算每个小组的样本均值
\begin{eqnarray}
\left\{\begin{array}{rcl}
\bar{X}_{1\cdot} &=& \frac{1}{5}(X_{11} + X_{12} + X_{13} + X_{14} + X_{15}), \\  
\bar{X}_{2\cdot} &=& \frac{1}{4}(X_{21} + X_{22} + X_{23} + X_{24}), \\  
\bar{X}_{3\cdot} &=& \frac{1}{4}(X_{31} + X_{32} + X_{33} + X_{34}), \\  
\bar{X}_{4\cdot} &=& \frac{1}{3}(X_{41} + X_{42} + X_{43}). 
\end{array}\right. 
\end{eqnarray}
然后计算所有数据的样本均值
\begin{eqnarray}
\bar{X} = \frac{1}{5+4+4+3}(5\bar{X}_{1\cdot} + 4\bar{X}_{2\cdot} + 4\bar{X}_{3\cdot} + 3\bar{X}_{4\cdot}). 
\end{eqnarray}

接下来计算所有数据的总离差平方和，
\begin{eqnarray}
S_T &=& \sum\limits_{i=1}^{4} \sum\limits_{j=1}^{n_i} (X_{ij}-\bar{X})^2 \\ \
&=& (X_{11}-\bar{X})^2 + (X_{12}-\bar{X})^2 + (X_{13}-\bar{X})^2 + (X_{14}-\bar{X})^2 + (X_{15}-\bar{X})^2 \nonumber \\ 
&& + (X_{21}-\bar{X})^2 + (X_{22}-\bar{X})^2 + (X_{23}-\bar{X})^2 + (X_{24}-\bar{X})^2 \nonumber \\ 
&& + (X_{31}-\bar{X})^2 + (X_{32}-\bar{X})^2 + (X_{33}-\bar{X})^2 + (X_{34}-\bar{X})^2 \nonumber \\ 
&& + (X_{41}-\bar{X})^2 + (X_{42}-\bar{X})^2 + (X_{43}-\bar{X})^2. \nonumber 
\end{eqnarray}

又可以计算组间平方和，即每组的均值与总均值的差异的平方和，
\begin{eqnarray}
S_A &=& \sum\limits_{i=1}^{4} \sum\limits_{j=1}^{n_i} (\bar{X}_{i\cdot}-\bar{X})^2 = \sum\limits_{i=1}^{4} n_i (\bar{X}_{i\cdot}-\bar{X})^2 \\ \
&=& n_1 (\bar{X}_{1\cdot}-\bar{X})^2 + n_2 (\bar{X}_{2\cdot}-\bar{X})^2 + n_3 (\bar{X}_{3\cdot}-\bar{X})^2 + n_4 (\bar{X}_{4\cdot}-\bar{X})^2. 
\end{eqnarray}

还可以计算组内平方和，也称为随机误差平方和，
\begin{eqnarray}
S_E &=& \sum\limits_{i=1}^{4} \sum\limits_{j=1}^{n_i} (\bar{X}_{ij}-\bar{X}_{i\cdot})^2 \\ \
&=& (X_{11}-\bar{X}_{1\cdot})^2 + (X_{12}-\bar{X}_{1\cdot})^2 + (X_{13}-\bar{X}_{1\cdot})^2 + (X_{14}-\bar{X}_{1\cdot})^2 + (X_{15}-\bar{X}_{1\cdot})^2 \nonumber \\ 
&& + (X_{21}-\bar{X}_{2\cdot})^2 + (X_{22}-\bar{X}_{2\cdot})^2 + (X_{23}-\bar{X}_{2\cdot})^2 + (X_{24}-\bar{X}_{2\cdot})^2 \nonumber \\ 
&& + (X_{31}-\bar{X}_{3\cdot})^2 + (X_{32}-\bar{X}_{3\cdot})^2 + (X_{33}-\bar{X}_{3\cdot})^2 + (X_{34}-\bar{X}_{3\cdot})^2 \nonumber \\ 
&& + (X_{41}-\bar{X}_{4\cdot})^2 + (X_{42}-\bar{X}_{4\cdot})^2 + (X_{43}-\bar{X}_{4\cdot})^2. \nonumber 
\end{eqnarray}

因为 $X_{ij}-\bar{X} = (X_{ij} - \bar{X}_{i\cdot}) + (\bar{X}_{i\cdot} - \bar{X})$, 代入 $S_T$ 的计算公式，经过一些代数运算可知有等式成立 
\begin{eqnarray}
S_T = S_A + S_E. 
\end{eqnarray}

\subsection{构造检验统计量}
方差分析的一个关键思路是这样的。当小组间的差异很小时，即原假设 
\begin{eqnarray}
H_0: \alpha_1=\alpha_2=\alpha_3=\alpha_4 = 0. 
\end{eqnarray}
成立时，组间平方和 $S_A$ 在总离差平方和中的比重会很小。为此我们考虑统计量
\begin{eqnarray}
\frac{S_A}{S_E}. 
\end{eqnarray}
当原假设成立时，这个统计量的取值应该接近零。为了方便计算，我们考虑统计量
\begin{eqnarray}
F = \frac{S_A/(r-1)}{S_E/(n-r)}, 
\end{eqnarray}
其中 $r$ 为分组的数目，本题 $r=4$ 组。$n$ 为样本总数，本题为 $n=5+4+4+3=16$. 
从数理统计的知识可以证明，这个统计量服从 $F(r-1,n-r)$ 分布。本题为 $F(3,12)$ 分布。


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{编程计算}

载入数据分析包 pandas, 数值计算包 numpy 和统计模型包 statsmodels. 
\begin{python}
import pandas as pd
import numpy as np
import statsmodels.api as sm
\end{python}

从excel 文件读入数据，创建字典型数据。
\begin{python}
a = pd.read_excel('data9_18.xlsx', header=None)
b = a.values.T 
y = b[~np.isnan(b)]
x = np.hstack([np.ones(5), np.full(4,2), np.full(4,3), np.full(3,4)])
d = {'x':x, 'y':y}
\end{python}

构建最小二乘回归模型，进行单因素方差分析。
\begin{python}
model = sm.formula.ols('y~C(x)', d).fit() 
anovat = sm.stats.anova_lm(model)
print(anovat)
\end{python}

程序运行结果的方差分析表如下。
\begin{python}
            df        sum_sq       mean_sq         F    PR(>F)
C(x)       3.0  60153.333333  20051.111111  3.727742  0.042004
Residual  12.0  64546.666667   5378.888889       NaN       NaN
\end{python}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{回答问题}

因为最后的$p$值为 $0.042$, 小于默认的 $\alpha=0.05$, 所以认为原假设不成立，认为不同工艺生产的灯泡的寿命有较大的差异。

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\section{参考文献 }
\begin{thebibliography}{99}

\bibitem{sishoukui-2} 司守奎,孙玺菁. \emph{Python数学建模算法与应用}, 国防工业出版社. 2022年1月第1版. 
\bibitem{maosisong} 茆诗松, 程依明, 濮晓龙. \emph{概率论与数理统计教程}. 高等教育出版社. 2019年11月第3版.


\end{thebibliography}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%





